package com.frame.assist.htmlparser;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
/**
 * @Title: 系统
 * @Description:
 * @Copyright: Copyright (c) 2013
 * @Company:
 * @author 王海锋
 * @CreatedTime:2013-3-11 下午08:41:54
 * @version 1.0
 */

public class Htmlparser {

    public String readHtmlFile(FileInputStream filein) throws IOException{
        InputStreamReader istrem=null;
        BufferedReader bufferedreader = null;
        String html="";
        try {
            istrem=new InputStreamReader(filein,"GBK");
            bufferedreader = new BufferedReader(istrem);
            String tmpstr = "";
            while ((tmpstr = bufferedreader.readLine()) != null)
                html += tmpstr+ "\n";
        } finally{
            istrem.close();
        }
        return html;
    }
    public NodeList getNodeListByTag(String html,String tag) throws ParserException{
        NodeFilter filter=new TagNameFilter(tag);
        return Parser.createParser(html, "GBK").extractAllNodesThatMatch(filter);
    }
}
